/* ********************************************************************************* */
/* 1 ****************** Control variables for robustness tests ********************* */
/* ********************************************************************************* */
/* Summary   :  Code builds variables used in robustness tests                        */
/* ********************************************************************************* */

	LIBNAME cow 'C:\cow';
	LIBNAME data 'C:\data';


************************;
* GET COMMON OWNERSHIP *;
************************;

* 13F data;
	
	* supplement stock price;
	/*Pull data from CRSP 'Monthly Stock - Securities' file, downloaded from /wrds/crsp/sasdata/m_stock */
	data msf; set data.msf; cusip6=substr(cusip,1,6); keep date cusip6 prc shrout; rename date=rdate; rename prc=prc_s; run;
	data inst13f_nonmissing; set cow.inst13f; if prc~=. and shrout2~=.; 
	if cusip~=""; cusip6=substr(cusip,1,6); drop cusip typecode; run;
	data inst13f_missing; set cow.inst13f; if prc=. or shrout2=.; if cusip~=""; 
	cusip6=substr(cusip,1,6); drop cusip typecode; run;
	proc sql; create table inst13f_prc as select * from inst13f_missing as a left join msf as b 
	on a.cusip6 = b.cusip6 and a.rdate=b.rdate; quit; 
	data inst13f_missing_fixed; set inst13f_prc; 
	if prc=. then prc=prc_s; 
	if shrout2=. then shrout2=shrout*1000; drop prc_s shrout; run;
	data inst13f; set inst13f_nonmissing inst13f_missing_fixed; run;
	proc sql; drop table inst13f_nonmissing, inst13f_missing, inst13f_missing_fixed, msf, Inst13f_prc; quit;


	data s34master; set inst13f; 
	PROPOWN=SHARES/(SHROUT2*1000); MVOWN=SHARES*PRC; 
	keep MGRNO RDATE CUSIP6 MVOWN PROPOWN; 
	run;
	proc sort data=s34master nodupkey; by MGRNO RDATE CUSIP6 MVOWN PROPOWN; run; 

* INST relative holding;
	proc sort data=s34master; by RDATE MGRNO CUSIP6; run;
	proc means noprint data=s34master; by RDATE MGRNO CUSIP6; var PROPOWN MVOWN;
	output out=temp_agg (drop=_:) sum=PROPOWN MVOWN; run;
	proc means noprint data=temp_agg; by RDATE MGRNO; var MVOWN;
	output out=sumMVOWN (drop=_:) sum=sumMVOWN; run;
	data temp_sum; merge temp_agg sumMVOWN; by RDATE MGRNO; PROPHLD=MVOWN/sumMVOWN; drop MVOWN sumMVOWN; 
	proc sort data=temp_sum; by RDATE MGRNO PROPHLD; run;
	proc rank data=temp_sum groups=100 out=temp_sum_rank; by RDATE MGRNO; var PROPHLD; ranks PROPHLD_rank; run; 

	proc sql; drop table temp_agg, temp_sum, Summvown; quit;

	proc sort data=temp_sum_rank; by rdate cusip6; run;
	data temp_sum_rank; set  temp_sum_rank; format rdate date9.; run;

	proc sql; drop table inst13f, s34master; quit;



/* Calculate "common ownership" the average common ownership between the firm and all other firms 
   with common ownership, which is defined following Gilje, Gormley, and Levit (2020).
   We require that common investors own at least 1% of the number of shares outstanding in the firms 
   and assume a linear function for investor attention */


* create row index for date;

	data header_s34index; set temp_sum_rank; if PROPOWN>=0.01;
	keep rdate ; proc sort nodupkey; by rdate ; run;
	data header_s34index; set header_s34index; rdate_row=_N_; run;

* create the other firm info;
	data firm_b; merge temp_sum_rank header_s34index; by rdate; if PROPOWN>=0.01; rename cusip6=cusip6_b; rename PROPOWN=PROPOWN_b; drop PROPHLD; run;

* create the main firm info;
	data master; merge temp_sum_rank header_s34index; by rdate; if PROPOWN>=0.01; run;


* calculate common ownership by joining the other firm into the mainfirm pairwise;
	
	filename junk dummy; proc printto  log=junk; run;

	%macro co;
    %do i = 1 %to 92;

	data master_loop; set master; if rdate_row=&i; run;
	data firm_b_loop; set firm_b; if rdate_row=&i; run;

	proc sql; create table joint as 
	select distinct a.rdate_row, a.rdate, a.cusip6, b.cusip6_b, count(a.propown) as n_cinst, sum(a.propown*a.prophld*b.propown_b) as cown 
	from master_loop as a, firm_b_loop as b
	where a.RDATE=b.RDATE and a.MGRNO=b.MGRNO
	group by a.rdate, a.cusip6, b.cusip6_b
	having a.cusip6~=b.cusip6_b
	order by a.rdate, a.cusip6, b.cusip6_b; quit;

	proc means noprint data=joint; by rdate_row rdate cusip6; var n_cinst cown;
	output out=co(drop=_type_) mean=n_cinst_mean cown_mean; run;
	data co_&i; set co; rename _freq_=n_cfirm_mean; run;

	%end;

	data cow.co; set %do i = 1 %to 92; co_&i %end; ; run;
	proc sql; %do i = 1 %to 92; drop table co_&i; %end; quit; run;

	%mend; %co; run;

	proc printto; run;
	

****************************************;
* GET INST OWNERSHIP BY ACTIVE/PASSIVE *;
****************************************;


* download institutional classification data from Bushee website
  https://accounting-faculty.wharton.upenn.edu/bushee/  ;
	proc import datafile="C:\data\iiclass19812018.xlsx" 
	out=iiclass dbms=xlsx replace; getnames=yes; run;
	data iiclass; set iiclass; 
	if perminvtype="DED" then active=1; else if perminvtype in ("QIX","TRA") then active=0;
	keep mgrno invyear active; run;

* get Inst Classification;
	data own; set temp_sum_rank; drop prophld PROPHLD_rank; run;

	proc sql; create table own1 as 
	select a.rdate, a.cusip6, a.propown as ownership, 
					a.propown*b.active as activeownership, 
				    a.propown*(1-b.active) as passiveownership 
	from own as a left join iiclass as b 
	on a.MGRNO = b.MGRNO and year(a.rdate)=b.invyear; quit;
	proc sort; by rdate cusip6; run;
	data own1; set own1; by rdate cusip6; if first.rdate or first.cusip6; run;

	proc means noprint data=own1; by rdate cusip6;
	var ownership activeownership passiveownership;
	output out=instown(drop=_:) sum=; run;
	proc sql; drop table own, own1; quit;
	

	data own1; set temp_sum_rank; drop prophld PROPHLD_rank; if propown>=0.01; run;

	proc sql; create table own11 as 
	select a.rdate, a.cusip6, a.propown as ownership1, 
					a.propown*b.active as activeownership1, 
				    a.propown*(1-b.active) as passiveownership1
	from own1 as a left join iiclass as b 
	on a.MGRNO = b.MGRNO and year(a.rdate)=b.invyear; quit;
	proc sort; by rdate cusip6; run;
	data own11; set own11; by rdate cusip6; if first.rdate or first.cusip6; run;

	proc means noprint data=own11; by rdate cusip6;
	var ownership1 activeownership1 passiveownership1;
	output out=instown1(drop=_:) sum=; run;
	proc sql; drop table own1, own11; quit;


	data own5; set temp_sum_rank; drop prophld PROPHLD_rank; if propown>=0.05; run;

	proc sql; create table own51 as 
	select a.rdate, a.cusip6, a.propown as ownership5, 
					a.propown*b.active as activeownership5, 
				    a.propown*(1-b.active) as passiveownership5 
	from own5 as a left join iiclass as b 
	on a.MGRNO = b.MGRNO and year(a.rdate)=b.invyear; quit;
	proc sort; by rdate cusip6; run;
	data own51; set own51; by rdate cusip6; if first.rdate or first.cusip6; run;

	proc means noprint data=own51; by rdate cusip6;
	var ownership5 activeownership5 passiveownership5;
	output out=instown5(drop=_:) sum=; run;
	proc sql; drop table own5, own51; quit;

	data cow.instown; merge instown instown1 instown5; by rdate cusip6; run;



********************************************;
* Get the number of business or geo segments;
********************************************;


* download segment data from WRDS Compustat save as segments_raw SAS data file
  https://wrds-www.wharton.upenn.edu/pages/get-data/compustat-capital-iq-standard-poors/compustat/historical-segments-daily/historical-segments/ ; 
	proc sort data=cow.segments_raw; by gvkey datadate; run;
	data segments_raw; set cow.segments_raw;
	if STYPE="BUSSEG" then busseg=1; else busseg=0;
	if STYPE="GEOSEG" then geoseg=1; else geoseg=0;
	gvkey1=gvkey*1; drop gvkey; rename gvkey1=gvkey;
	run;
	proc means data=segments_raw noprint; by gvkey datadate; var busseg geoseg;
	output out=cow.segments (drop=_:) sum=; run;


***********************************;
* Get firms paying dividends or not;
***********************************;

* download dividend data from WRDS CRSP Stock Events - Distribution Information, save as dividends_raw SAS data file
  https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/stock-events/distribution/ ; 
	data div; set cow.dividends_raw; divyr=year(exdt); keep permno divyr; proc sort nodupkey; by permno divyr; run;
	data div_first; set div; rename divyr=divyr_first; proc sort; by permno divyr_first; run;
	data div_first; set div_first; by permno; if first.permno; run;
	data div_last; set div; rename divyr=divyr_last; proc sort; by permno descending divyr_last; run;
	data div_last; set div_last; by permno; if first.permno; run;
	data cow.divpaying; merge div div_first div_last; by permno; if divyr~=. then div=1; else div=0; run;


* download CRSP/Compustat merged header file from WRDS CRSP/Compustat Merged - Fundamentals Annual;
* https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/crspcompustat-merged/fundamentals-annual/ ; 

	* merge all extra data into header file;
	data header; set data.Headercrspcomp; if year(datadate)>=1990; permno=lpermno; gvkey1=gvkey*1; drop gvkey; rename gvkey1=gvkey;
	if 10<=month(datadate)<=12 then rdate=mdy(12,31,year(datadate));
	if 7<=month(datadate)<=9 then rdate=mdy(9,30,year(datadate));
	if 4<=month(datadate)<=6 then rdate=mdy(6,30,year(datadate));
	if 1<=month(datadate)<=3 then rdate=mdy(3,31,year(datadate));
	keep gvkey1 permno cusip datadate fyear rdate; run;
	data header; set header; row=_n_; run;

	data instcoown; merge cow.instown cow.co; by rdate cusip6; run;
 
	proc sql; create table header_inst as select *
	from header as a left join instcoown as b 
	on substr(a.cusip,1,6)=b.cusip6 and a.rdate=b.rdate 
	order by a.row ; quit;

	proc sql; create table header_seg as select *
	from header_inst as a left join cow.segments as b 
	on a.gvkey=b.gvkey and a.datadate=b.datadate 
	order by a.row ; quit;

	proc sql; create table header_div as select *
	from header_seg as a left join cow.divpaying as b 
	on a.permno=b.permno and a.fyear=b.divyr 
	order by a.row ; quit;


*****************************************************************************************
    Classified board data file from Guernsey, Sepe, and Serfling (2022) 
	from Serfling website https://sites.google.com/utk.edu/matthew-serfling/research 
*****************************************************************************************;
	proc import datafile="C:\data\cboard.xlsx" 
	out=cboard dbms=xlsx replace; getnames=yes; run;
	proc sql; create table header_cboard as select *
	from header_div as a left join cboard as b 
	on a.gvkey=b.gvkey and a.fyear=b.fyear 
	order by a.row ; quit;


*******************************************************************************************************
    Independent director data is processed using ISS directors data
    2 SAS data files: directors (pre 2007) save as irrcdirectors
	                  rmdirectors (post 2007) save as rmdirectors
				      from /wrds/iss/sasdata/directors 
*******************************************************************************************************;

	* get directors from IRRC (pre 2007);
	data b1; set data.irrcdirectors; 
	* fix wrong cusip;
	if cusip="07383Q" then cusip="073902"; 
	if cusip="950587" then cusip="950590"; 
	if cusip="250833" then cusip="285661"; 
	if cusip="161218" then cusip="845905"; 
	if cusip="252456" then cusip="23330X"; 
	if cusip="153672" then cusip="635405"; 
	if cusip="929769" then cusip="929903"; 
	if cusip="514761" then cusip="514936"; 
	if cusip="587186" then cusip="587188"; 
	if cusip="165159" then cusip="165159"; 
	if cusip="81616W" then cusip="81616X"; 
	if classification = 'L' or classification = 'E' then D_out = 0;
	if classification = 'I' then D_out = 1;
	if cusip in ("0","") or meetingdate=. then delete; 
	keep cusip name first_name last_name meetingdate d_out;
	run;
	proc sort; by cusip meetingdate; run;

	* get directors from RiskMetrics;
	data rmdirectors; set data.rmdirectors; run;
	proc sort data=rmdirectors out=rmdirectors noduprecs; by DIRECTOR_DETAIL_ID; run;

	data b2; set rmdirectors; 
	* fix wrong cusip;
	if cusip="07383QAA2" then cusip="073902AA2"; 
	if cusip="950587105" then cusip="950590105"; 
	if cusip="250833AA8" then cusip="285661AA8"; 
	if cusip="161218102" then cusip="845905102"; 
	if cusip="252456AA6" then cusip="23330XAA6"; 
	if cusip="153672100" then cusip="635405100"; 
	if cusip="929769107" then cusip="929903107"; 
	if cusip="514761105" then cusip="514936105"; 
	if cusip="587186107" then cusip="587188107"; 
	if cusip="165159104" then cusip="165159104"; 
	if cusip="81616W204" then cusip="81616X204"; 
	if cusip="54615109" then cusip="054615109"; 
	if substr(classification,1,1) = 'I' then D_out = 1; else d_out=0;
	if cusip in ("0","") or meetingdate=. then delete; 
	keep cusip name first_name last_name meetingdate d_out;
	run;
	proc sort; by cusip meetingdate; run;

* merge 2 boards;
	data board_raw; set b1 b2; proc sort; by cusip meetingdate name; run;
	data board; set board_raw; run; 

* calculate pct of independent directors;
	proc means noprint data=board; by cusip meetingdate name; 
	output out=board_inddir (drop=_:) mean = pct_inddir; var D_out; run;

* get inddir into main file;
	proc sql; create table header_b as select *, b.meetingdate-a.datadate as days, abs(calculated days) as absdays, min(calculated absdays)as close   
	from header_cboard as a left join board as b on substr(a.cusip,1,6)=b.cn6 and b.meetingdate-365<=a.datadate<=b.meetingdate+365 
	group by a.row having calculated absdays=calculated close; quit;
	data header_b; set header_b; drop meetingdate name days absdays close cn6; run;


********************;
* Insider ownership ;
********************;

	* CEO ownership data from Thompson Reuters Insider Filing
	  "table1" SAS data file download from /wrds/tfn/sasdata/insiders ;
	data table1; set data.table1;
	if rolecode1="CEO" or rolecode2="CEO" or rolecode3="CEO" or rolecode4="CEO";
	if cleanse in ("R","H");
	if ownership="D";
	if cusip6~="";
	if sharesheld_adj~=.;
	if SECTITLE="COM";
	keep cusip6 personid owner trandate sharesheld_adj ;
	run;
	proc sort data=table1; by cusip6 personid owner trandate descending sharesheld_adj ; run;
	data cow.table1_CEOshares; set table1; by cusip6 personid owner trandate; if first.cusip6 or first.personid or first.owner or first.trandate; run;

	* CEO ownership data from Compustat Execucomp
	  "anncomp" SAS data file download from /wrds/comp/sasdata/execcomp ;
	data cow.exec_CEOshares; set data.anncomp; if CEOANN = 'CEO';	 
	fyear=year;cusip6=substr(cusip,1,6); shrown=SHROWN_EXCL_OPTS*1000;
	keep cusip6 fyear shrown; 
	run;

	* get ownership into the main file;
	proc sql; create table header_o as select distinct a.*, sum(b.shrown) as shrown 
	from header_b as a left join cow.exec_CEOshares as b 
	on substr(a.cusip,1,6)=b.cusip6 and a.fyear=b.fyear
	group by a.row; quit;
	proc sql; create table header_own as select *, a.datadate-b.trandate as days, abs(calculated days) as absdays, min(calculated absdays)as close   
	from header_o as a left join cow.table1_CEOshares as b on substr(a.cusip,1,6)=b.cusip6 and a.datadate-365<=b.trandate<=a.datadate 
	group by a.row having calculated absdays=calculated close; quit;
	data msf; set data.msf; cusip6=substr(cusip,1,6); keep date cusip6 shrout; run;
	proc sql; create table header_insideown as select a.*, b.shrout from header_own as a left join msf as b 
	on a.cusip6 = b.cusip6 and a.datadate=b.date; quit; 
	proc sort data=header_insideown; by row descending trandate; run;
	data header_insideown; set header_insideown; 	by row; if first.row; 
	ceoshares=shrown; if ceoshares=. then ceoshares=sharesheld_adj;
	ceoownership=ceoshares/(shrout*1000); 
	drop days absdays close shrout shrown  sharesheld_adj personid owner trandate;
	run; 

	* final control var file;
	data cow.controls; set header_insideown; rename row=controlrow; run;
